Fix later - and make your own


In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
%matplotlib inline

In [2]:
from sklearn.datasets import load_breast_cancer

breast_cancer = load_breast_cancer()

X = breast_cancer.data
y= breast_cancer.target

In [3]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [4]:
#Import SGD Classifier
from sklearn.linear_model import SGDClassifier

#Create instance of Random Forest Classifier
sgd = SGDClassifier()

#Fit estimator to 70% of the data
sgd.fit(X_train, y_train)

#Predict final 30%
y_pred = sgd.predict(X_test)

In [5]:
#Import Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

#Create instance of Random Forest Classifier
rfc = RandomForestClassifier()

#Fit estimator to 70% of the data
rfc.fit(X_train, y_train)

#Predict final 30%
y_pred = rfc.predict(X_test)

In [7]:
X_test.shape


Out[7]:
(171, 30)

In [10]:
"""sample_data = np.array([-10,-1,-2,3])
y_scores = sgd.decision_function(sample_data.reshape(1,-1))
y_scores
"""


Out[10]:
'sample_data = np.array([-10,-1,-2,3])\ny_scores = sgd.decision_function(sample_data.reshape(1,-1))\ny_scores\n'

In [11]:
"""
threshold=0
y_sample_data_pred = (y_scores>threshold)
y_sample_data_pred
"""


Out[11]:
'\nthreshold=0\ny_sample_data_pred = (y_scores>threshold)\ny_sample_data_pred\n'

In [17]:
from sklearn.model_selection import cross_val_predict

In [18]:
y_scores = cross_val_predict(sgd, X_train,y_train, cv=3,method='decision_function')

In [19]:
from sklearn.metrics import precision_recall_curve

precision,recall,thresholds = precision_recall_curve(y_test,y_pred)

In [20]:
plt.plot(precision,recall);



In [25]:
def plot_precision_recall_vs_threshold(precisions, recalls, thresholds):
    plt.plot(thresholds, precisions[:-1], "b--", label="Precision", linewidth=2)
    plt.plot(thresholds, recalls[:-1], "g-", label="Recall", linewidth=2)
    plt.xlabel("Threshold", fontsize=16)
    plt.legend(loc="upper left", fontsize=16)
    plt.ylim([0, 2])

plt.figure(figsize=(8, 4))
plot_precision_recall_vs_threshold(precision, recall, thresholds)
plt.xlim([0, 1])
#save_fig("precision_recall_vs_threshold_plot")
plt.show()



In [27]:
from sklearn.metrics import roc_curve

fpr, tpr, thresholds = roc_curve(y_train, y_scores)

In [32]:
def plot_roc_curve(fpr, tpr, label=None):
    plt.plot(fpr, tpr, linewidth=2, label=label)
    plt.axis([0, .5, 0, 1.01])
    plt.xlabel('False Positive Rate', fontsize=16)
    plt.ylabel('True Positive Rate', fontsize=16)

plt.figure(figsize=(8, 6))
plot_roc_curve(fpr, tpr)
#save_fig("roc_curve_plot")
plt.show()



In [ ]: